{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d95229e6-c581-4d8a-8d4d-02a8f9a5afe8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:06:23.092284Z",
     "iopub.status.busy": "2022-10-21T16:06:23.091804Z",
     "iopub.status.idle": "2022-10-21T16:06:23.424402Z",
     "shell.execute_reply": "2022-10-21T16:06:23.423921Z",
     "shell.execute_reply.started": "2022-10-21T16:06:23.092195Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Scikit-Learn Version : 1.1.2\n"
     ]
    }
   ],
   "source": [
    "import sklearn\n",
    "\n",
    "print(\"Scikit-Learn Version : {}\".format(sklearn.__version__))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cfe19e49-18c0-4d12-842c-26244e2f5b1b",
   "metadata": {},
   "source": [
    "https://coderzcolumn.com/tutorials/machine-learning/scikit-learn-incremental-learning-for-large-datasets"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e2d10c6c-8753-4f34-b8b9-53ef682d169d",
   "metadata": {},
   "source": [
    "# 模型汇总"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5b28837f-a03c-486e-b14a-e5d6b290b7d4",
   "metadata": {},
   "source": [
    "- Regression\n",
    "    -   sklearn.linear_model.SGDRegressor\n",
    "    -   sklearn.linear_model.PassiveAggressiveRegressor\n",
    "    -   sklearn.neural_network.MLPRegressor\n",
    "- Classification\n",
    "    -   sklearn.naive_bayes.MultinomialNB\n",
    "    -   sklearn.naive_bayes.BernoulliNB\n",
    "    -   sklearn.linear_model.Perceptron\n",
    "    -   sklearn.linear_model.SGDClassifier\n",
    "    -   sklearn.linear_model.PassiveAggressiveClassifier\n",
    "    -   sklearn.neural_network.MLPClassifier\n",
    "- Clustering\n",
    "    -   sklearn.cluster.MiniBatchKMeans\n",
    "    -   sklearn.cluster.Birch\n",
    "- Preprocessing\n",
    "    -   sklearn.preprocessing.StandardScaler\n",
    "    -   sklearn.preprocessing.MinMaxScaler\n",
    "    -   sklearn.preprocessing.MaxAbsScaler\n",
    "- Decomposition / Dimensionality Reduction\n",
    "    -   sklearn.decomposition.MiniBatchDictionaryLearning\n",
    "    -   sklearn.decomposition.IncrementalPCA\n",
    "    -   sklearn.decomposition.LatentDirichletAllocation"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0677361b-c18b-49ed-a063-a3abc38030aa",
   "metadata": {},
   "source": [
    "# 回归案例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3230a4ea-bd57-449e-884d-fce5015a483e",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:07:25.866669Z",
     "iopub.status.busy": "2022-10-21T16:07:25.866241Z",
     "iopub.status.idle": "2022-10-21T16:07:27.023079Z",
     "shell.execute_reply": "2022-10-21T16:07:27.022639Z",
     "shell.execute_reply.started": "2022-10-21T16:07:25.866619Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((216000, 100), (24000, 100), (216000,), (24000,))"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, Y = datasets.make_regression(n_samples=240000, random_state=123)\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=123)\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "479d4f58-a064-42e0-8a5e-eb21a120fe81",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:07:32.535491Z",
     "iopub.status.busy": "2022-10-21T16:07:32.534937Z",
     "iopub.status.idle": "2022-10-21T16:07:32.544348Z",
     "shell.execute_reply": "2022-10-21T16:07:32.543957Z",
     "shell.execute_reply.started": "2022-10-21T16:07:32.535438Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((9000, 24, 100), (1000, 24, 100), (9000, 24), (1000, 24))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test = X_train.reshape(-1,24,100), X_test.reshape(-1,24,100)\n",
    "Y_train, Y_test = Y_train.reshape(-1,24), Y_test.reshape(-1,24)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2296e80c-63c7-4bcb-b800-a0250d61d721",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:07:51.540111Z",
     "iopub.status.busy": "2022-10-21T16:07:51.539461Z",
     "iopub.status.idle": "2022-10-21T16:08:02.577989Z",
     "shell.execute_reply": "2022-10-21T16:08:02.577238Z",
     "shell.execute_reply.started": "2022-10-21T16:07:51.540030Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import SGDRegressor\n",
    "\n",
    "regressor = SGDRegressor()\n",
    "epochs = 10\n",
    "\n",
    "for k in range(epochs): ## Number of loops through data\n",
    "    for i in range(X_train.shape[0]): ## Looping through batches\n",
    "        X_batch, Y_batch = X_train[i], Y_train[i]\n",
    "        regressor.partial_fit(X_batch, Y_batch) ## Partially fitting data in batches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "94e7019d-0ab8-4177-8820-86e0a99f736d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:08:07.594732Z",
     "iopub.status.busy": "2022-10-21T16:08:07.594164Z",
     "iopub.status.idle": "2022-10-21T16:08:07.649950Z",
     "shell.execute_reply": "2022-10-21T16:08:07.649536Z",
     "shell.execute_reply.started": "2022-10-21T16:08:07.594681Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test MSE      : 0.0002487686327610577\n",
      "Test R2 Score : 0.9999999907247031\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import mean_squared_error, r2_score\n",
    "\n",
    "Y_test_preds = []\n",
    "for j in range(X_test.shape[0]): ## Looping through test batches for making predictions\n",
    "    Y_preds = regressor.predict(X_test[j])\n",
    "    Y_test_preds.extend(Y_preds.tolist())\n",
    "\n",
    "print(\"Test MSE      : {}\".format(mean_squared_error(Y_test.reshape(-1), Y_test_preds)))\n",
    "print(\"Test R2 Score : {}\".format(r2_score(Y_test.reshape(-1), Y_test_preds)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "75eb44bd-2427-46d2-9e83-d5be2edbff3b",
   "metadata": {},
   "source": [
    "# 分类案例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a3f9f142-4eae-4204-a315-5812d7ee5601",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:09:04.614099Z",
     "iopub.status.busy": "2022-10-21T16:09:04.613541Z",
     "iopub.status.idle": "2022-10-21T16:09:04.715084Z",
     "shell.execute_reply": "2022-10-21T16:09:04.714392Z",
     "shell.execute_reply.started": "2022-10-21T16:09:04.614049Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((28800, 30), (3200, 30), (28800,), (3200,))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, Y = datasets.make_classification(n_samples=32000, n_features=30, n_informative=20, n_classes=2)\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=123)\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f4416a01-a93f-4689-850b-0d6633c47d02",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:09:12.941212Z",
     "iopub.status.busy": "2022-10-21T16:09:12.940631Z",
     "iopub.status.idle": "2022-10-21T16:09:12.949604Z",
     "shell.execute_reply": "2022-10-21T16:09:12.949124Z",
     "shell.execute_reply.started": "2022-10-21T16:09:12.941160Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((900, 32, 30), (100, 32, 30), (900, 32), (100, 32))"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test = X_train.reshape(-1,32,30), X_test.reshape(-1,32,30)\n",
    "Y_train, Y_test = Y_train.reshape(-1,32), Y_test.reshape(-1,32)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "9d1665d3-b3f2-40ff-97fc-52c36d59fed4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:09:39.068197Z",
     "iopub.status.busy": "2022-10-21T16:09:39.067588Z",
     "iopub.status.idle": "2022-10-21T16:09:42.175602Z",
     "shell.execute_reply": "2022-10-21T16:09:42.175105Z",
     "shell.execute_reply.started": "2022-10-21T16:09:39.068144Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import SGDClassifier\n",
    "\n",
    "classifier = SGDClassifier(random_state=123)\n",
    "epochs = 10\n",
    "for k in range(epochs):\n",
    "    for i in range(X_train.shape[0]):\n",
    "        X_batch, Y_batch = X_train[i], Y_train[i]\n",
    "        classifier.partial_fit(X_batch, Y_batch, classes=list(range(2)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "cac794f9-f9c3-4b7d-96aa-b100361e9bd8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:09:50.542073Z",
     "iopub.status.busy": "2022-10-21T16:09:50.541594Z",
     "iopub.status.idle": "2022-10-21T16:09:50.557692Z",
     "shell.execute_reply": "2022-10-21T16:09:50.557325Z",
     "shell.execute_reply.started": "2022-10-21T16:09:50.542025Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Test Accuracy      : 0.7278125\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import accuracy_score\n",
    "\n",
    "Y_test_preds = []\n",
    "for j in range(X_test.shape[0]):\n",
    "    Y_preds = classifier.predict(X_test[j])\n",
    "    Y_test_preds.extend(Y_preds.tolist())\n",
    "\n",
    "print(\"Test Accuracy      : {}\".format(accuracy_score(Y_test.reshape(-1), Y_test_preds)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bfa8aae0-296f-4664-b2b0-988dc755b8c5",
   "metadata": {},
   "source": [
    "# 聚类案例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "612ab22e-8944-49c3-bd99-0f510a7782a4",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:10:18.106953Z",
     "iopub.status.busy": "2022-10-21T16:10:18.106517Z",
     "iopub.status.idle": "2022-10-21T16:10:18.165641Z",
     "shell.execute_reply": "2022-10-21T16:10:18.164966Z",
     "shell.execute_reply.started": "2022-10-21T16:10:18.106919Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((28800, 30), (3200, 30), (28800,), (3200,))"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, Y = datasets.make_blobs(n_samples=32000, n_features=30, centers=5, cluster_std=0.7, random_state=12345)\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=123)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ebb901eb-d12b-41aa-8cde-6c59949f25f2",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:10:23.536647Z",
     "iopub.status.busy": "2022-10-21T16:10:23.536067Z",
     "iopub.status.idle": "2022-10-21T16:10:23.546515Z",
     "shell.execute_reply": "2022-10-21T16:10:23.545820Z",
     "shell.execute_reply.started": "2022-10-21T16:10:23.536595Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((900, 32, 30), (100, 32, 30), (900, 32), (100, 32))"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test = X_train.reshape(-1,32,30), X_test.reshape(-1,32,30)\n",
    "Y_train, Y_test = Y_train.reshape(-1,32), Y_test.reshape(-1,32)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "61f07f58-f440-4294-b1eb-cd9ecd6b4d5a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:10:34.214089Z",
     "iopub.status.busy": "2022-10-21T16:10:34.213491Z",
     "iopub.status.idle": "2022-10-21T16:10:41.925992Z",
     "shell.execute_reply": "2022-10-21T16:10:41.925413Z",
     "shell.execute_reply.started": "2022-10-21T16:10:34.214035Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sklearn.cluster import MiniBatchKMeans\n",
    "\n",
    "clustering_algo = MiniBatchKMeans(n_clusters=5, random_state=123)\n",
    "epochs = 10\n",
    "for k in range(epochs):\n",
    "    for i in range(X_train.shape[0]):\n",
    "        X_batch, Y_batch = X_train[i], Y_train[i]\n",
    "        clustering_algo.partial_fit(X_batch, Y_batch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "ab1d49fb-8b17-4b87-9eaf-d01d7dfd231f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:11:05.543605Z",
     "iopub.status.busy": "2022-10-21T16:11:05.543175Z",
     "iopub.status.idle": "2022-10-21T16:11:05.598010Z",
     "shell.execute_reply": "2022-10-21T16:11:05.597460Z",
     "shell.execute_reply.started": "2022-10-21T16:11:05.543553Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rand_score      : 1.0\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import adjusted_rand_score\n",
    "\n",
    "Y_test_preds = []\n",
    "for j in range(X_test.shape[0]): \n",
    "    Y_preds = clustering_algo.predict(X_test[j])\n",
    "    Y_test_preds.extend(Y_preds.tolist())\n",
    "\n",
    "print(\"rand_score      : {}\".format(adjusted_rand_score(Y_test.reshape(-1), Y_test_preds)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f7873fc5-6042-4967-bcf2-5fcfd5cac22c",
   "metadata": {},
   "source": [
    "# 预处理案例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "5f9211e3-0471-4be4-9f7f-4527f42148d6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:11:34.238704Z",
     "iopub.status.busy": "2022-10-21T16:11:34.238364Z",
     "iopub.status.idle": "2022-10-21T16:11:35.373745Z",
     "shell.execute_reply": "2022-10-21T16:11:35.373098Z",
     "shell.execute_reply.started": "2022-10-21T16:11:34.238680Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((216000, 100), (24000, 100), (216000,), (24000,))"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, Y = datasets.make_regression(n_samples=240000, random_state=123)\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=123)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "50b11b53-519c-4c70-851f-440f9d9024d3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:11:40.540852Z",
     "iopub.status.busy": "2022-10-21T16:11:40.540415Z",
     "iopub.status.idle": "2022-10-21T16:11:40.550025Z",
     "shell.execute_reply": "2022-10-21T16:11:40.549532Z",
     "shell.execute_reply.started": "2022-10-21T16:11:40.540800Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((9000, 24, 100), (1000, 24, 100), (9000, 24), (1000, 24))"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test = X_train.reshape(-1,24,100), X_test.reshape(-1,24,100)\n",
    "Y_train, Y_test = Y_train.reshape(-1,24), Y_test.reshape(-1,24)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "3b361f69-f635-4ee4-b13b-a928dd885f3c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:12:00.612731Z",
     "iopub.status.busy": "2022-10-21T16:12:00.612305Z",
     "iopub.status.idle": "2022-10-21T16:12:01.948835Z",
     "shell.execute_reply": "2022-10-21T16:12:01.947912Z",
     "shell.execute_reply.started": "2022-10-21T16:12:00.612681Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "scaler = StandardScaler()\n",
    "\n",
    "for i in range(X_train.shape[0]):\n",
    "    X_batch, Y_batch = X_train[i], Y_train[i]\n",
    "    scaler.partial_fit(X_batch, Y_batch) ## Partially fitting data in batches"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7bb96c34-b8ad-466a-a2aa-65107e23b4f0",
   "metadata": {},
   "source": [
    "# 降维案例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "7329a4fb-fb52-46ee-803e-4508279517ed",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:12:31.807615Z",
     "iopub.status.busy": "2022-10-21T16:12:31.807078Z",
     "iopub.status.idle": "2022-10-21T16:12:31.856140Z",
     "shell.execute_reply": "2022-10-21T16:12:31.855679Z",
     "shell.execute_reply.started": "2022-10-21T16:12:31.807566Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((28800, 30), (3200, 30), (28800,), (3200,))"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "X, Y = datasets.make_classification(n_samples=32000, n_features=30)\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.9, random_state=123)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "6a65ea95-d4f3-4b9b-8f58-367f364e765f",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:12:40.539669Z",
     "iopub.status.busy": "2022-10-21T16:12:40.539122Z",
     "iopub.status.idle": "2022-10-21T16:12:40.549298Z",
     "shell.execute_reply": "2022-10-21T16:12:40.548787Z",
     "shell.execute_reply.started": "2022-10-21T16:12:40.539617Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((900, 32, 30), (100, 32, 30), (900, 32), (100, 32))"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test = X_train.reshape(-1,32,30), X_test.reshape(-1,32,30)\n",
    "Y_train, Y_test = Y_train.reshape(-1,32), Y_test.reshape(-1,32)\n",
    "\n",
    "X_train.shape, X_test.shape, Y_train.shape, Y_test.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "690a125b-4b43-460c-a261-35c021137ee3",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-10-21T16:12:51.538048Z",
     "iopub.status.busy": "2022-10-21T16:12:51.537229Z",
     "iopub.status.idle": "2022-10-21T16:12:52.569625Z",
     "shell.execute_reply": "2022-10-21T16:12:52.568675Z",
     "shell.execute_reply.started": "2022-10-21T16:12:51.537958Z"
    }
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import SGDClassifier\n",
    "from sklearn.decomposition import IncrementalPCA\n",
    "\n",
    "pca = IncrementalPCA(n_components=20)\n",
    "\n",
    "for i in range(X_train.shape[0]):\n",
    "    X_batch, Y_batch = X_train[i], Y_train[i]\n",
    "    pca.partial_fit(X_batch, Y_batch) ## Partially fitting data in batches"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dffb73df-5133-4783-981f-e69b1b078b52",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3.10"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.10"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
